#这次的爬取是我的selenium的一个练习，boss直聘，关于区块链的职位爬取

import time
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from lxml import etree
from selenium import webdriver


class Boss:
    basic_url='https://www.zhipin.com'
    exepath=r'E:\programlibs\chromedriver\chromedriver.exe'
    def __init__(self):
        self.driver=webdriver.Chrome(self.exepath)
        self.url='https://www.zhipin.com/c101270100/?query=%E5%8C%BA%E5%9D%97%E9%93%BE&page=1&ka=page-1'

    def run(self):
        self.driver.get(self.url)
        while True:
            self.parse_url(self.driver.page_source)
            next=WebDriverWait(driver=self.driver, timeout=10).until(
                EC.presence_of_element_located((By.XPATH, '//a[@ka="page-next"]'))
            )
            # next=self.driver.find_element_by_xpath('')
            if "next disabled" in next.get_attribute('class'):
                break
            next.click()
            print(self.driver.current_url)

    def parse_url(self,source):
        WebDriverWait(driver=self.driver,timeout=10).until(
            EC.presence_of_element_located((By.XPATH,'//div[@class="job-list"]'))
        )
        html=etree.HTML(source)

        links=html.xpath('//div[@class="job-list"]/ul/li//h3[@class="name"]/a/@href')[-2:-1]
        for link in links:
            url=self.basic_url+link
            self.detail_run(url)
            time.sleep(1)


    def detail_run(self,url):
        self.driver.execute_script('window.open("%s")'%url)
        self.driver.switch_to.window(self.driver.window_handles[1])
        source=self.driver.page_source
        self.parse_detail(source)

    def parse_detail(self,source):
        html=etree.HTML(source)
        WebDriverWait(driver=self.driver, timeout=10).until(
            EC.presence_of_element_located((By.XPATH, '//span[@class="salary"]'))
        )
        salary=html.xpath('//span[@class="salary"]/text()')[0]
        request=html.xpath('//div[@class="info-primary"]/p//text()')[:4]
        position=html.xpath('//div[@class="info-primary"]//h1/text()')[0]
        subcribe=html.xpath('//div[@class="job-sec"]/div/text()')[0]
        job={
            'position':position,
            'request':request,
            'salary':salary,
            'subscibe':subcribe
        }
        print(job)
        print("*"*30)
        time.sleep(2)
        self.driver.close()
        self.driver.switch_to.window(self.driver.window_handles[0])

if __name__=='__main__':
    boss=Boss()
    boss.run()